******************************************************************************************
* ONLINE APPENDIX FIGURE A10
* EFFECT OF THE VENEZUELAN IMMIGRATION ALONG THE DISTRIBUTION OF NATIVES' WAGES 
* 
* ALSO NEED: ControlsDustmann2013.do
* 
* LAST MODIFIED: JULY, 2025
******************************************************************************************

clear all 

* INSTALL SCHEMES FOR GRAPHS
*ssc install blindschemes, replace all 
*ssc install schemepack, replace /*based on https://github.com/asjadnaqvi/Stata-schemes*/

**# DIRECTORIES

* PAPER DIRECTORY DROPBOX
global fiscal "/Users/andres/Library/CloudStorage/Dropbox/2 Papers/2023/Paper_Fiscal_Costs"

* SUBDIRECTORIES
global data "$fiscal/1 Data/12 Final Datasets"
global programs "$fiscal/2 Programs/2 Figures and Tables paper"
global results "$fiscal/3 Results"

**# LOAD DATASET AND CREATE NEW VARIABLES

* LOAD DATASET WITH VARIABLES NEEDED
use "$data/LaborEffects.dta", clear

**#SAMPLE SELECTION 

* ONLY METROPOLITAN AREAS 
drop if area >= 81 /* not observations for these areas*/
drop if area == 25 /* not observations */
keep if area !=. 

* DUMMIES FOR YEAR AND MSA
tab year, gen(year)
tab area, gen(area)

* KEEP ONLY WORKING-AGE POPULATION
drop if (age<15|age>64) 

* KEEP IF NOT ENROLLED IN SCHOOL
*keep if att_school == 2 

* KEEP THOSE THAT WORK LESS THAN 100 HOURS A WEEK  
*drop if weekly_hours>100

* DELIMIT THE ANALYSIS FOR THE YEARS 2013-2018. 
drop if year > 2018

* DISPLAY FORMAT FOR SAMPLING WEIGHTS 
format %12.0f fex12

* GENERATE ROUNDED WEIGHTS FOR SPECIFIC COMMANDS THAT REQUIRE THEM 
gen fex12_round = fex12
replace fex12_round = round(fex12_round) 

**# GLOBAL MACROS FOR VARIABLES
global controls  i.sex1 age c.age#c.age educ2-educ5

**#  HANDLING OUTLIERS
* OTHER WAY TO CHECK OUTLIERS IS WITH SPIKEPLOT  
levelsof year, local(year)
foreach j of local year {
display "this is year `j'"
centile hourly_real_wages if year == `j', cent(0.5 99.5)
return list 
local cent1 = r(c_1)
display `cent1'
local cent99 = r(c_2)
display `cent99'
* DROP TAILS 
drop if hourly_real_wages > `cent99'  
drop if hourly_real_wages < `cent1' 
}


**# RESIDUALIZATION 

**## STRATEGY 1: RESIDUALIZE BY YEAR 

/*
gen res_logWages = 0
levelsof year, local(year)
foreach j of local year {
display "This is the year `j'"
capture {
regress log_hourly_real_wages $controls if year == `j' & emp  == 1 & immig == 0 & area != .  [w=fex12], cluster(area)  
predict res_logWages_`j' if year == `j', residuals 
replace res_logWages = res_logWages_`j' if year == `j'
}
}
drop res_logWages_2013-res_logWages_2019
label var res_logWages "Residualized log wages"
*/

**##  STRATEGY 2: RESIDUALIZE ALL SAMPLE AS IN BRETRAND 2004
regress log_hourly_real_wages $controls if emp  == 1 & immig == 0 & area != .  [pw = fex12], cluster(area)  
predict res_logWages, residuals 
label var res_logWages "Residualized log wages"


**# CREATE # OBSERVATIONS BY QUANTILE FOR EACH AREA IN 2013 TO WEIGHT REGRESSIONS BY QUANTILES

* GENERATE OBSERVATIONS BY QUANTILES (5(5)95)
gen pctilesResiWagesArea2013 = .
levelsof area, local(area)
foreach j of local area {
xtile pctilesResiWages`j' = res_logWages if year == 2013 & area == `j' [pw = fex12], nquantiles(19) 
replace  pctilesResiWagesArea2013 = pctilesResiWages`j' if year == 2013 & area == `j'	
}

* COLLAPSE DATASET WITH NUMBER OF OBSERVATIONS PER QUANTILE PER AREA IN 2013
preserve
keep area year pctilesResiWagesArea2013 fex12
drop if year > 2013
drop year
gen percentiles2013 = pctilesResiWagesArea2013
* COUNT OBSERVATIONS PER QUANTILE AND AREA AND ADJUST FOR POPULATION WEIGHTS 
collapse (count) percentiles2013 [iw = fex12], by(area pctilesResiWagesArea2013)
drop if  pctilesResiWagesArea2013 == .
rename percentiles2013 obsResiWagePctil
gen copy_pctiles = pctilesResiWagesArea2013
* RECODE TO FACILITATE INTERPRETATION 
replace pctilesResiWagesArea2013 = 5 if copy_pctiles == 1
replace pctilesResiWagesArea2013 = 10 if copy_pctiles == 2
replace pctilesResiWagesArea2013 = 15 if copy_pctiles == 3
replace pctilesResiWagesArea2013 = 20 if copy_pctiles == 4
replace pctilesResiWagesArea2013 = 25 if copy_pctiles == 5
replace pctilesResiWagesArea2013 = 30 if copy_pctiles == 6
replace pctilesResiWagesArea2013 = 35 if copy_pctiles == 7
replace pctilesResiWagesArea2013 = 40 if copy_pctiles == 8
replace pctilesResiWagesArea2013 = 45 if copy_pctiles == 9
replace pctilesResiWagesArea2013 = 50 if copy_pctiles == 10
replace pctilesResiWagesArea2013 = 55 if copy_pctiles == 11
replace pctilesResiWagesArea2013 = 60 if copy_pctiles == 12
replace pctilesResiWagesArea2013 = 65 if copy_pctiles == 13
replace pctilesResiWagesArea2013 = 70 if copy_pctiles == 14
replace pctilesResiWagesArea2013 = 75 if copy_pctiles == 15
replace pctilesResiWagesArea2013 = 80 if copy_pctiles == 16
replace pctilesResiWagesArea2013 = 85 if copy_pctiles == 17
replace pctilesResiWagesArea2013 = 90 if copy_pctiles == 18
replace pctilesResiWagesArea2013 = 95 if copy_pctiles == 19
drop copy_pctiles
* RESHAPE TO CREATE COUNT OF PERCENTILES PER OBSERVATION AS VARIABLES
reshape wide obsResiWagePctil, i(area) j(pctilesResiWagesArea2013) 
* SAVE DATASET
tempfile weights_pctiles
save `weights_pctiles'
restore 

**# AGGREGATE RESIDUAL WAGES BY PERCENTILE  FOR EACH YEAR AND METROPOLITAN AREA

* COLLAPSE DATASET BY PERCENTILE OF THE RESIDUAL WAGES, YEAR, AND MSA
collapse ///
(p5) res_logWages5 = res_logWages  ///
(p10) res_logWages10 = res_logWages ///
(p15) res_logWages15 = res_logWages  ///
(p20) res_logWages20 = res_logWages  ///
(p25) res_logWages25 = res_logWages ///
(p30) res_logWages30 = res_logWages ///
(p35) res_logWages35 = res_logWages  ///
(p40) res_logWages40 = res_logWages  ///
(p45) res_logWages45 = res_logWages  ///
(p50) res_logWages50 = res_logWages  ///
(p55) res_logWages55 = res_logWages  ///
(p60) res_logWages60 = res_logWages ///
(p65) res_logWages65 = res_logWages  ///
(p70) res_logWages70 = res_logWages  ///
(p75) res_logWages75 = res_logWages ///
(p80) res_logWages80 = res_logWages  ///
(p85) res_logWages85 = res_logWages ///
(p90) res_logWages90 = res_logWages ///
(p95) res_logWages95 = res_logWages ///
(first) totpop13_wa [iw = fex12], by(year area)
drop if area== .


**# ADDITIONAL DATASETS 

**## SAVE DATASET WITH SHARES, INSTRUMENTS, AND GDP
preserve 
use "$data/LaborEffects.dta", clear
collapse (first) mig_share2013 IV2005 IVdist gdp, by(year area)
*SAVE DATASET
tempfile rightside
save `rightside'
restore 

* MERGE WITH `rightside' DATASET
merge 1:1 year area using  `rightside' , nogenerate 

* MERGE WITH QUANTILE WEIGHTS  
merge m:1 area using  `weights_pctiles' , nogenerate 

* DROP OBSERVATIONS FOR PERIODS WITH NOT IMMIGRANT INFORMATION 
drop if mig_share2013 == .


**## CREATE AGGREGATED CONTROLS A LA DUSTMAN 2013 
preserve
do "$programs/ControlsDustmann2013.do"
* CREATE A TEMPFILE TO SAVE
tempfile DustControls
save `DustControls'
restore 

**## FINAL DATASET FOR QUANTILE REGRESSIONS
* MERGE WITH CURRENT DATASET
merge 1:1 year area using  `DustControls', nogenerate 

**# ESTIMATIONS
 
* RESTRICT SAMPLE TO PERIOD 2013-2018
drop if year == 2019 

* DUMMIES
tab year, gen(year)
tab area, gen(area)

* GLOBAL FOR CONTROLS A LA DUSTMANN (INCLUDED AS FD)
global controlsDust MeanAgeNatives MeanAgeImmFromVenz ln_highTolowNatives ln_interTolowNatives

* SET PANEL AND TIME VAR
tsset area year

**## CREATE MATRIX TO SAVE OLS AND IV RESULTS WITH T-BASED CIs 
matrix OLS = J(19,4,.)
matrix rownames OLS = p5 p10 p15 p20 p25 p30 p35 p40 p45 p50 p55 p60 p65 p70 p75 p80 p85 p90 p95
matrix colnames OLS = pctile b ll ul 
 
matrix IV1 = J(19,4,.)
matrix rownames IV1 = p5 p10 p15 p20 p25 p30 p35 p40 p45 p50 p55 p60 p65 p70 p75 p80 p85 p90 p95
matrix colnames IV1 = pctile b ll ul 

matrix IV2 = J(19,4,.)
matrix rownames IV2 = p5 p10 p15 p20 p25 p30 p35 p40 p45 p50 p55 p60 p65 p70 p75 p80 p85 p90 p95
matrix colnames IV2 = pctile b ll ul 

matrix list OLS
matrix list IV1
matrix list IV2

local i = 1
foreach j of numlist 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 {

display "This is percentile `i'"

**## OLS 
ivreg2 res_logWages`j' mig_share2013 year2-year6 area2-area23  [aw = obsResiWagePctil`j'], cluster(area) small partial(area2-area23)
matrix OLS[`i',1] = `j'
matrix OLS[`i',2] = _b[mig_share2013]
matrix OLS[`i',3] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix OLS[`i',4] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]

**## IV1 (ENCLAVE INSTRUMENT)
ivreg2 res_logWages`j' (mig_share2013 = IV2005) year2-year6 area2-area23  [aw = obsResiWagePctil`j'], cluster(area) first small partial(area2-area23)
matrix IV1[`i',1] = `j'
matrix IV1[`i',2] = _b[mig_share2013]
matrix IV1[`i',3] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix IV1[`i',4] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]

**## IV2 (DISTANCE INSTRUMENT)
ivreg2 res_logWages`j' (mig_share2013 = IVdist) year2-year6 area2-area23  [aw = obsResiWagePctil`j'], cluster(area) first small partial(area2-area23)
matrix IV2[`i',1] = `j'
matrix IV2[`i',2] = _b[mig_share2013]
matrix IV2[`i',3] = _b[mig_share2013 ] - invttail(e(df_r),0.025)*_se[mig_share2013]
matrix IV2[`i',4] = _b[mig_share2013 ] + invttail(e(df_r),0.025)*_se[mig_share2013]

local ++i 
}

matrix list OLS
matrix list IV1
matrix list IV2

**# PLOTTING EFFECTS ALONG THE DISTRIBUTION OF WAGES 

**## OLS
coefplot matrix(OLS[,2]), ci((OLS[,3] OLS[,4])) ///
vertical ///
scheme(plottig) xtitle("Percentile") ///
ciopts(recast(rline ,) lpattern(dash)) ///
recast(connected,) ///
yline(0, lcolor(red) lpattern(dash)) ///
xlabel(5 "p25" 10 "p50" 15 "p75" 18 "p90", labsize(small)) ///
title("A. OLS", size(medium)) fysize(50) fxsize(80)
tempfile results1
graph save "`results1'"

**## IV1 (ENCLAVE INSTRUMENT) -> TRADITIONAL CIs 
coefplot matrix(IV1[,2]), ci((IV1[,3] IV1[,4])) ///
vertical ///
scheme(plottig) xtitle("Percentile") ///
ciopts(recast(rline, ) lpattern(dash)) ///
recast(connected,) ///
yline(0, lcolor(red) lpattern(dash)) ///
xlabel(5 "p25" 10 "p50" 15 "p75" 18 "p90", labsize(small)) ///
title("B. IV: enclave", size(medium)) fysize(50) fxsize(80)
tempfile results2
graph save "`results2'"

**## IV2 (DISTANCE INSTRUMENT) -> TRADITIONAL CIs
coefplot matrix(IV2[,2]), ci((IV2[,3] IV2[,4])) ///
vertical ///
scheme(plottig) xtitle("Percentile") ///
ciopts(recast(rline, ) lpattern(dash)) ///
recast(connected,) ///
yline(0, lcolor(red) lpattern(dash)) ///
xlabel(5 "p25" 10 "p50" 15 "p75" 18 "p90", labsize(small)) ///
title("C. IV: distance", size(medium)) fysize(50) fxsize(80)
tempfile results3
graph save "`results3'"


**# FIGURE A10 ONLINE APPENDIX 
graph combine "`results1'" "`results2'" "`results3'", /// 
graphregion(color(white)) cols(3) rows(1)  ///
imargin(0 0 0)  ///
ycommon xcommon ///
scheme(plottig)
qui graph save "$results/1 Figures Paper/FigA10_1.gph", replace 
qui graph export "$results/1 Figures Paper/FigA10_1.eps", as(eps) replace		 


